library(foreign)
library(readstata13)
library(ri)
library(sandwich)
library(ggplot2)
library(reshape2)
library(stringi)
library(xtable)
library(lmtest)
library(ggmap)
library(lfe)
library(stargazer)
library(fields)


##Difference in means function

mean_diff <- function(outcome, treat){
  
  #Calculating difference in means (ATE)
  mean1 <- mean(outcome[treat == 1], na.rm = T)
  mean0 <- mean(outcome[treat == 0], na.rm = T)
  ATE <- mean1 - mean0
  
  # and calculating their variance
  var1 <- sum((outcome[treat == 1] - mean1)^2, na.rm = TRUE) / (length(outcome[treat == 0]) - 1)
  var1
  
  var0 <- sum((outcome[treat == 1] - mean0)^2, na.rm = TRUE) / (length(outcome[treat == 0]) - 1)
  var0
  
  # and with this we can estimate the SE of the difference of means
  estimated_se <- sqrt((var1/length(outcome[treat == 1]) + var0/length(outcome[treat == 0])))
  estimated_se
  
  # converting to standard units
  t_stat <- (ATE - 0) / estimated_se
  t_stat
  
  # To be able to get the right Student t Distribution, we need to calculate
  # the degrees of freedom (Satterthwaite)
  df <- (var1/length(outcome[treat == 1]) + var0/length(outcome[treat == 0]))^2 / 
    ((var1/length(outcome[treat == 1]))^2 / (length(outcome[treat == 1]) - 1) + 
       (var0/length(outcome[treat == 0]))^2 / (length(outcome[treat == 0]) - 1))
  
  # One tailed p-value: 
  p_one <- pt(t_stat, df = df, ncp = 0, lower.tail = F)  
  # plus two tailed p-value:
  p_two <- 2 * pt(-abs(t_stat), df = df)
  
  
  ##Regression output to get robust SEs
  
  reg <- lm(outcome~treat)
  
  se_robust <- diag(vcovHC(reg, type = "HC"))[2]^0.5
  
  t_stat2 <- (ATE - 0) / se_robust
  
  # plus two tailed p-value:
  p_two_robust <- 2 * pt(-abs(t_stat2), df=df)
  
  ##Randomization inference
  #p_ri <- PermutationTest(outcome[treat == 1],
                          #outcome[treat == 0])
  
  ##KS Test
  ks <- ks.test(outcome[treat == 0], 
                outcome[treat == 1])$p.value
  
  #Preparing output
  res <- list(round(mean1, 3), round(mean0, 3), 
              round(ATE, 3), round(estimated_se, 4), 
              round(p_two, 4), round(se_robust, 4), 
              round(p_two_robust, 4), 
              #round(p_ri, 4), 
              round(ks, 4), round(df,0))
  
  names(res) <- c("mean_treat", "mean_control", "ate",
                  "se_conventional", "p_conventional",
                  "se_robust", "p_robust", 
                  #"p_randomization", 
                  "ks", "df")
  
  return(c(res))
}


plot_iv_itt_2 <- function(var, treat, fe, clust, 
                          comp, het, names, level, 
                          xlab, ylab){
  
  library(lmtest)
  
  coef <- matrix(NA, 2, ncol(het))
  
  se <- matrix(NA, 2, ncol(het))
  
  
  for ( i in 1:ncol(het)){
    
    model <- felm(var~treat|fe|0|clust, subset = het[,i] == 1)
    
    coefs <- coeftest(model, vcovHC = "HC0")
    
    model2 <- felm(var~treat|fe|0|clust, subset = het[,i] == 0)
    
    coefs2 <- coeftest(model2, vcovHC = "HC0")
    
    coef[,i] <- c(coefs[1], coefs2[1])
    
    se[,i] <- c(coefs[2], coefs2[2])
    
    
  }
  
  myfunc <- function(v1) {
    deparse(substitute(v1))
  }
  
  outs <- cbind.data.frame(factor(rep(names, each = 2), 
                                  levels = unique(names)),
                           factor(level, levels = 
                                    unique(level)), 
                           as.vector(coef), as.vector(se))
  
  names(outs) <- c("Category", "bw", "point_est", "se")
  
  library(ggplot2)
  
  g <- ggplot(outs, aes(x=bw, y=point_est, group = bw)) +
    
    geom_errorbar(aes(ymin=point_est - (1.96 * se), 
                      ymax=point_est + (1.96*se)), width=0, 
                  lwd = 0.5) +
    
    geom_errorbar(aes(ymin=point_est - (1.645 * se), 
                      ymax=point_est + (1.645*se)), 
                  width=0, lwd = 1.25) +
    
    geom_hline(yintercept = 0, lty = 2, lwd = 1) +
    
    geom_point(aes(y = point_est), cex = 2) +
    
    labs(x = xlab, y = ylab) +
    
    theme_bw()  + 
    
    facet_wrap(~Category, strip.position = "bottom", 
               scales = "free_x")+
    
    theme(axis.text=element_text(size=18),
          axis.title=element_text(size=18,face="bold"),
          strip.text.x = element_text(size=18))
  
  return(print(g))
} 


plot_iv_itt_3 <- function(var, treat, fe, clust, comp, het, names, level, xlab, ylab){
  
  library(lmtest)
  
  coef <- matrix(NA, 3, ncol(het))
  
  se <- matrix(NA, 3, ncol(het))
  
  for ( i in 1:ncol(het)){
    
    model <- felm(var~treat|0|0|clust, data, subset = het[,i]  ==  1)
    
    coefs <- coeftest(model, vcovHC = "HC0")
    
    model2 <- felm(var~treat|0|0|clust, subset = het[,i] == 0)
    
    coefs2 <- coeftest(model2, vcovHC = "HC0")
    
    model3 <- felm(var~treat|0|0|clust, subset = het[,i] == 2)
    
    coefs3 <- coeftest(model3, vcovHC = "HC0")
    
    coef[,i] <- c(coefs2[2], coefs[2], coefs3[2])
    
    se[,i] <- c(coefs2[4], coefs[4], coefs3[4])  
    
  }
  
  myfunc <- function(v1) {
    deparse(substitute(v1))
  }
  
  outs <- cbind.data.frame(factor(rep(names, each = 3), 
                                  levels = unique(names)),
                           factor(level, levels = 
                                    unique(level)), 
                           as.vector(coef), as.vector(se))
  
  names(outs) <- c("Category", "bw", "point_est", "se")
  
  library(ggplot2)
  
  g <- ggplot(outs, aes(x=bw, y=point_est, group = bw)) + geom_errorbar(aes(ymin=point_est - (1.96 * se), 
                                                                            ymax=point_est + (1.96*se)), width=0, lwd = 0.5) +
    
    geom_errorbar(aes(ymin=point_est - (1.645 * se), 
                      ymax=point_est + (1.645*se)), 
                  width=0, lwd = 1.25) +
    
    geom_hline(yintercept = 0, lty = 2, lwd = 1) +
    
    geom_point(aes(y = point_est), cex = 2) +
    
    labs(x = xlab, 
         y = ylab) +
    
    theme_bw()  + 
    
    facet_wrap(~Category, strip.position = "bottom", 
               scales = "free_x")+
    theme(axis.text=element_text(size=18),
          axis.title=element_text(size=18,face="bold"),
          strip.text.x = element_text(size=18))
  
  return(print(g))
}


plot_iv_cace_3 <- function(var, treat, fe, clust, 
                           comp, het, names, level, 
                           xlab, ylab){
  
  library(lmtest)
  
  coef <- matrix(NA, 3, ncol(het))
  
  se <- matrix(NA, 3, ncol(het))
  
  for ( i in 1:ncol(het)){
    
    model <- felm(var~1|0|(comp~treat)|clust, 
                  subset = het[,i]  ==  1)
    
    coefs <- coeftest(model, vcovHC = "HC0")
    
    model2 <- felm(var~1|0|(comp~treat)|clust, 
                   subset = het[,i] == 0)
    
    coefs2 <- coeftest(model2, vcovHC = "HC0")
    
    
    model3 <- felm(var~treat|0|0|clust, 
                   subset = het[,i] == 2)
    
    coefs3 <- coeftest(model3, vcovHC = "HC0")
    
    coef[,i] <- c(coefs2[2], coefs[2], coefs3[2])
    
    se[,i] <- c(coefs2[4], coefs[4], coefs3[4])  
    
  }
  
  myfunc <- function(v1) {
    deparse(substitute(v1))
  }
  
  outs <- cbind.data.frame(factor(rep(names, each = 3), 
                                  levels = unique(names)),
                           factor(level, levels = 
                                    unique(level)), 
                           as.vector(coef), as.vector(se))
  
  names(outs) <- c("Category", "bw", "point_est", "se")
  
  library(ggplot2)
  
  g <- ggplot(outs, aes(x=bw, y=point_est, group = bw)) +
    geom_errorbar(aes(ymin=point_est - (1.96 * se), 
                      ymax=point_est + (1.96*se)), 
                  width=0, lwd = 0.5) +
    geom_errorbar(aes(ymin=point_est - (1.645 * se), 
                      ymax=point_est + (1.645*se)), 
                  width=0, lwd = 1.25) +
    geom_hline(yintercept = 0, lty = 2, lwd = 1) +
    geom_point(aes(y = point_est), cex = 2) +
    labs(x = xlab, 
         y = ylab) +
    theme_bw()  + 
    facet_wrap(~Category, strip.position = "bottom", 
               scales = "free_x")+
    theme(axis.text=element_text(size=18),
          axis.title=element_text(size=18,face="bold"),
          strip.text.x = element_text(size=18))
  
  return(print(g))
}


ttest  <-  function(outcome, treat){
  
  #Calculating difference in means (ATE)
  mean1 <- mean(outcome[treat == 1], na.rm = T)
  mean0 <- mean(outcome[treat == 0], na.rm = T)
  ATE <- mean1 - mean0
  
  # and calculating their variance
  var1 <- sum((outcome[treat == 1] - mean1)^2, na.rm = TRUE) /
    (length(outcome[treat == 1 & is.na(outcome) == FALSE& is.na(treat) == FALSE]) - 1)
  var1
  
  var0 <- sum((outcome[treat == 0] - mean0)^2, na.rm = TRUE) / (length(outcome[treat == 0 & is.na(outcome) == FALSE& is.na(treat) == FALSE]) - 1)
  var0
  
  # and with this we can estimate the SE of the difference of means
  estimated_se <- sqrt((var1/length(outcome[treat == 1 & is.na(outcome) == FALSE  & is.na(treat) == FALSE])) + (var0/length(outcome[treat == 0 & is.na(outcome) == FALSE& is.na(treat) == FALSE])))
  estimated_se
  
  se_treat <- var1/length(outcome[treat == 1 & 
                                    is.na(outcome) == FALSE &
                                    is.na(treat) == FALSE])
  
  se_control <- var0/length(outcome[treat == 0 & 
                                      is.na(outcome) == FALSE &
                                      is.na(treat) == FALSE])
  # converting to standard units
  t_stat <- (ATE - 0) / estimated_se
  t_stat
  
  # To be able to get the right Student t Distribution, we need to calculate
  # the degrees of freedom (Satterthwaite)
  df <- (var1/length(outcome[treat == 1]) + 
           var0/length(outcome[treat == 0]))^2 / 
    ((var1/length(outcome[treat == 1]))^2 / 
       (length(outcome[treat == 1]) - 1) + 
       (var0/length(outcome[treat == 0]))^2 / 
       (length(outcome[treat == 0]) - 1))
  
  # One tailed p-value: 
  p_one <- pt(t_stat, df=df, ncp=0, lower.tail = F)
  
  # plus two tailed p-value:
  p_two <- 2 * pt(-abs(t_stat), df=df)
  
  #Preparing output
  res  <-  list(round(ATE, 3), round(estimated_se, 4), round(p_two, 8), round(p_one, 8), round(df,0), round(se_treat, 4), round(se_control, 4), round(mean1, 4), round(mean0, 4))
  names(res)  <-  c("ATE", "SE", "P Value (two-tailed)", "P Value (one-tailed)", "DF", "se_treat", 
                    "se_control", 
                    "treat_mean", "control_mean")
  
  return(c(res))
}
